Libraries

library(lattice)
library(ISLR)
library(MASS)
library(caret)
Loading required package: ggplot2
library(tidyverse)
── Attaching packages ────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ tibble  2.0.1     ✔ purrr   0.3.2
✔ tidyr   0.8.3     ✔ dplyr   0.8.3
✔ readr   1.3.1     ✔ stringr 1.3.1
✔ tibble  2.0.1     ✔ forcats 0.4.0
── Conflicts ───────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ purrr::lift()   masks caret::lift()
✖ dplyr::select() masks MASS::select()
library(rpart)
library(plyr); library(dplyr)
-------------------------------------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
-------------------------------------------------------------------------------------------------------------

Attaching package: ‘plyr’

The following objects are masked from ‘package:dplyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise, summarize

The following object is masked from ‘package:purrr’:

    compact
library(caret)
library(rattle)                 # Fancy tree plot
Rattle: A free graphical interface for data science with R.
Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot) 
library(dplyr)
library(parallel)
library(Hmisc)
Loading required package: survival

Attaching package: ‘survival’

The following object is masked _by_ ‘.GlobalEnv’:

    heart

The following object is masked from ‘package:rpart’:

    solder

The following object is masked from ‘package:caret’:

    cluster

Loading required package: Formula

Attaching package: ‘Hmisc’

The following objects are masked from ‘package:plyr’:

    is.discrete, summarize

The following objects are masked from ‘package:dplyr’:

    src, summarize

The following object is masked from ‘package:e1071’:

    impute

The following objects are masked from ‘package:base’:

    format.pval, units
library(e1071)
library(pROC)
Type 'citation("pROC")' for a citation.

Attaching package: ‘pROC’

The following objects are masked from ‘package:stats’:

    cov, smooth, var
library(ggplot2)
library(rpart.plot)
library(VIM)
Loading required package: colorspace

Attaching package: ‘colorspace’

The following object is masked from ‘package:pROC’:

    coords

Loading required package: grid
Loading required package: data.table
data.table 1.12.2 using 4 threads (see ?getDTthreads).  Latest news: r-datatable.com

Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last

The following object is masked from ‘package:purrr’:

    transpose

VIM is ready to use. 
 Since version 4.0.0 the GUI is in its own package VIMGUI.

          Please use the package to use the new (and old) GUI.

Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues

Attaching package: ‘VIM’

The following object is masked from ‘package:datasets’:

    sleep
library(mice)

Attaching package: ‘mice’

The following object is masked from ‘package:tidyr’:

    complete

The following objects are masked from ‘package:base’:

    cbind, rbind
# Reading the dataset
nbi <- read.csv('../../../data/trees/decision-tree-dataset/decision_tree.csv')
# Select attributesk
df <- nbi %>% dplyr::select(adt.cat, adtt.cat, material, state, structure.number, structure.type, type.of.wearing.surface, current.deck, current.substructure, current.superstructure, total.deck.intervention, total.sub.intervention, total.super.intervention, deck.intervention.in.next.3.years, sub.intervention.in.next.3.years, super.intervention.in.next.3.years, precipitation, snowfall, freezethaw, score)

Dataset to model deck of the bridges

# Select attributes
df_deck <- nbi %>% dplyr::select(adt.cat, adtt.cat, material, structure.type, type.of.wearing.surface, current.deck, current.substructure, current.superstructure, total.deck.intervention, total.sub.intervention, total.super.intervention,  precipitation, snowfall, freezethaw, score, deck.intervention.in.next.3.years)
# Remove null values
df_deck <- na.omit(df_deck)

Preview of the data

head(df_deck)

Training and testing Deck

target_variable <- 'deck.intervention.in.next.3.years'
index = createDataPartition(y=df_deck[[target_variable]], p=0.7, list=FALSE)
train.set = df_deck[index,]
test.set = df_deck[-index,]
positive_class = 'No'
negative_class = 'Yes' 
reset.seed <- function()
{
  # ensure results are repeatable
  set.seed(1337)
}
library(doParallel)
num_cores <- detectCores() #note: you can specify a smaller number if you want
cl <- makePSOCKcluster(num_cores)
registerDoParallel(cl)
reset.seed()
model <- deck.intervention.in.next.3.years ~ adt.cat + adtt.cat + material + structure.type + type.of.wearing.surface + current.deck + current.substructure + current.superstructure + total.deck.intervention + total.sub.intervention + total.super.intervention +  precipitation + snowfall + freezethaw + score
tunelengths = seq(from=5, to=10, by=5)
list_sens <- c()
list_spec <- c()
list_f1 <-c()
list_tl <-c()
list_kappa <-c()
list_auc <- c()
probabilities_dt <- data.frame(No=double(), Yes=double())
for(tl in tunelengths) {
rtree_model = train( model,
                 data = train.set,
                 method = "rpart",
                 trControl = trainControl(method = "repeatedcv", search = 'random', repeats = 5,
                                          summaryFunction = twoClassSummary,
                                          classProbs = T, savePredictions = T),  tuneLength = tl,
                 metric='ROC')
rtree_model
# Predict on the training set
tree_class_test <- rtree_model%>% predict(newdata = test.set, type = 'raw')
tree_prob_test <- rtree_model%>% predict(newdata = test.set, type = 'prob')
# Confusion Matrix
metrics <- confusionMatrix(tree_class_test, test.set[[target_variable]])
metricsbyclass <- metrics$byClass
sens <- type.convert(metricsbyclass[1])
list_sens <- c(list_sens, sens)
spec <- type.convert(metricsbyclass[2])
list_spec <- c(list_spec, spec)
f1 <- type.convert(metricsbyclass[7])
list_f1 <- c(list_f1, f1)
tunelen <- tl
list_tl <- c(list_tl, tunelen)
kappa <- type.convert(metrics$overall[2])
list_kappa <- c(list_kappa, kappa)
# Confusion matrix
area <- roc(test.set[[target_variable]], tree_prob_test[[positive_class]], plot = TRUE, print.auc = TRUE, legacy.axes = TRUE)
list_auc <- c(list_auc, area$auc)
# rpart model
rpart.plot(rtree_model$finalModel)
filename <-paste('../../../data/trees/models/deck/tree-deck-no-undersample', toString(tl), '.csv', collapse = '')
write.csv(rpart.rules(rtree_model$finalModel, roundint=FALSE, clip.facs=TRUE), filename)
# Probability values
tree_prob_test['Tunelength'] <- rep(tl, length(tree_class_test))
tree_prob_test['StructureNumber'] <- df[rownames(tree_prob_test),]$structure.number
# Concatenate
probabilities_dt <- bind_rows(probabilities_dt, tree_prob_test)
# metrics
metrics
}
Warning in .Internal(gc(verbose, reset, full)) :
  closing unused connection 18 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
  closing unused connection 17 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
  closing unused connection 16 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
  closing unused connection 15 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
  closing unused connection 14 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
  closing unused connection 13 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
  closing unused connection 12 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
  closing unused connection 11 (<-localhost:11814)
Setting levels: control = No, case = Yes
Setting direction: controls > cases

df_metric_dt <- data.frame(list_sens, list_spec, list_f1, list_tl, list_kappa, list_auc)
names(df_metric_dt) <- c('Sensitivity', 'Specificity', 'F1','Tunelength', 'Kappa', 'AUC')
# Writing outputs
write.csv(probabilities_dt, '../../../data/trees/metrics/dt/tree-prob-deck-nou.csv')
write.csv(probabilities_dt, '../../../data/trees/metrics/dt/tree-metric-deck-nou.csv')
write.csv(df_metric_dt, '../../../data/trees/metrics/dt/tree-metric-deck-nou.csv')
df_metric_dt
probabilities_dt
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKYXV0aG9yOiBBa3NoYXkgS2FsZQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKZGVzY3JpcHRpb246IERlY2lzaW9uIHRyZWUgbW9kZWxpbmcgb2YgZGVjayB3aXRob3V0IHVuZGVyc2FtcGxpbmcuCi0tLQoKIyMjIyAqTGlicmFyaWVzKgpgYGB7cn0KbGlicmFyeShsYXR0aWNlKQpsaWJyYXJ5KElTTFIpCmxpYnJhcnkoTUFTUykKbGlicmFyeShjYXJldCkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocnBhcnQpCmxpYnJhcnkocGx5cik7IGxpYnJhcnkoZHBseXIpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkocmF0dGxlKSAgICAgICAgICAgICAgICAgIyBGYW5jeSB0cmVlIHBsb3QKbGlicmFyeShycGFydC5wbG90KSAKbGlicmFyeShkcGx5cikKbGlicmFyeShwYXJhbGxlbCkKbGlicmFyeShIbWlzYykKbGlicmFyeShlMTA3MSkKbGlicmFyeShwUk9DKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocnBhcnQucGxvdCkKbGlicmFyeShWSU0pCmxpYnJhcnkobWljZSkKCiMgUmVhZGluZyB0aGUgZGF0YXNldApuYmkgPC0gcmVhZC5jc3YoJy4uLy4uLy4uL2RhdGEvdHJlZXMvZGVjaXNpb24tdHJlZS1kYXRhc2V0L2RlY2lzaW9uX3RyZWUuY3N2JykKCiMgU2VsZWN0IGF0dHJpYnV0ZXNrCmRmIDwtIG5iaSAlPiUgZHBseXI6OnNlbGVjdChhZHQuY2F0LCBhZHR0LmNhdCwgbWF0ZXJpYWwsIHN0YXRlLCBzdHJ1Y3R1cmUubnVtYmVyLCBzdHJ1Y3R1cmUudHlwZSwgdHlwZS5vZi53ZWFyaW5nLnN1cmZhY2UsIGN1cnJlbnQuZGVjaywgY3VycmVudC5zdWJzdHJ1Y3R1cmUsIGN1cnJlbnQuc3VwZXJzdHJ1Y3R1cmUsIHRvdGFsLmRlY2suaW50ZXJ2ZW50aW9uLCB0b3RhbC5zdWIuaW50ZXJ2ZW50aW9uLCB0b3RhbC5zdXBlci5pbnRlcnZlbnRpb24sIGRlY2suaW50ZXJ2ZW50aW9uLmluLm5leHQuMy55ZWFycywgc3ViLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMsIHN1cGVyLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMsIHByZWNpcGl0YXRpb24sIHNub3dmYWxsLCBmcmVlemV0aGF3LCBzY29yZSkKYGBgCgojIyMjICpEYXRhc2V0IHRvIG1vZGVsIGRlY2sgb2YgdGhlIGJyaWRnZXMqCmBgYHtyfQojIFNlbGVjdCBhdHRyaWJ1dGVzCmRmX2RlY2sgPC0gbmJpICU+JSBkcGx5cjo6c2VsZWN0KGFkdC5jYXQsIGFkdHQuY2F0LCBtYXRlcmlhbCwgc3RydWN0dXJlLnR5cGUsIHR5cGUub2Yud2VhcmluZy5zdXJmYWNlLCBjdXJyZW50LmRlY2ssIGN1cnJlbnQuc3Vic3RydWN0dXJlLCBjdXJyZW50LnN1cGVyc3RydWN0dXJlLCB0b3RhbC5kZWNrLmludGVydmVudGlvbiwgdG90YWwuc3ViLmludGVydmVudGlvbiwgdG90YWwuc3VwZXIuaW50ZXJ2ZW50aW9uLCAgcHJlY2lwaXRhdGlvbiwgc25vd2ZhbGwsIGZyZWV6ZXRoYXcsIHNjb3JlLCBkZWNrLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMpCgojIFJlbW92ZSBudWxsIHZhbHVlcwpkZl9kZWNrIDwtIG5hLm9taXQoZGZfZGVjaykKYGBgCgojIyMjICpQcmV2aWV3IG9mIHRoZSBkYXRhKgpgYGB7cn0KaGVhZChkZl9kZWNrKQpgYGAKCiMjIyMgKlRyYWluaW5nIGFuZCB0ZXN0aW5nIERlY2sqCmBgYHtyfQp0YXJnZXRfdmFyaWFibGUgPC0gJ2RlY2suaW50ZXJ2ZW50aW9uLmluLm5leHQuMy55ZWFycycKaW5kZXggPSBjcmVhdGVEYXRhUGFydGl0aW9uKHk9ZGZfZGVja1tbdGFyZ2V0X3ZhcmlhYmxlXV0sIHA9MC43LCBsaXN0PUZBTFNFKQp0cmFpbi5zZXQgPSBkZl9kZWNrW2luZGV4LF0KdGVzdC5zZXQgPSBkZl9kZWNrWy1pbmRleCxdCgpwb3NpdGl2ZV9jbGFzcyA9ICdObycKbmVnYXRpdmVfY2xhc3MgPSAnWWVzJyAKCnJlc2V0LnNlZWQgPC0gZnVuY3Rpb24oKQp7CiAgIyBlbnN1cmUgcmVzdWx0cyBhcmUgcmVwZWF0YWJsZQogIHNldC5zZWVkKDEzMzcpCn0KbGlicmFyeShkb1BhcmFsbGVsKQpudW1fY29yZXMgPC0gZGV0ZWN0Q29yZXMoKSAjbm90ZTogeW91IGNhbiBzcGVjaWZ5IGEgc21hbGxlciBudW1iZXIgaWYgeW91IHdhbnQKY2wgPC0gbWFrZVBTT0NLY2x1c3RlcihudW1fY29yZXMpCnJlZ2lzdGVyRG9QYXJhbGxlbChjbCkKCnJlc2V0LnNlZWQoKQptb2RlbCA8LSBkZWNrLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMgfiBhZHQuY2F0ICsgYWR0dC5jYXQgKyBtYXRlcmlhbCArIHN0cnVjdHVyZS50eXBlICsgdHlwZS5vZi53ZWFyaW5nLnN1cmZhY2UgKyBjdXJyZW50LmRlY2sgKyBjdXJyZW50LnN1YnN0cnVjdHVyZSArIGN1cnJlbnQuc3VwZXJzdHJ1Y3R1cmUgKyB0b3RhbC5kZWNrLmludGVydmVudGlvbiArIHRvdGFsLnN1Yi5pbnRlcnZlbnRpb24gKyB0b3RhbC5zdXBlci5pbnRlcnZlbnRpb24gKyAgcHJlY2lwaXRhdGlvbiArIHNub3dmYWxsICsgZnJlZXpldGhhdyArIHNjb3JlCgp0dW5lbGVuZ3RocyA9IHNlcShmcm9tPTUsIHRvPTEwMCwgYnk9NSkKbGlzdF9zZW5zIDwtIGMoKQpsaXN0X3NwZWMgPC0gYygpCmxpc3RfZjEgPC1jKCkKbGlzdF90bCA8LWMoKQpsaXN0X2thcHBhIDwtYygpCmxpc3RfYXVjIDwtIGMoKQpwcm9iYWJpbGl0aWVzX2R0IDwtIGRhdGEuZnJhbWUoTm89ZG91YmxlKCksIFllcz1kb3VibGUoKSkKCmZvcih0bCBpbiB0dW5lbGVuZ3RocykgewpydHJlZV9tb2RlbCA9IHRyYWluKCBtb2RlbCwKICAgICAgICAgICAgICAgICBkYXRhID0gdHJhaW4uc2V0LAogICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJycGFydCIsCiAgICAgICAgICAgICAgICAgdHJDb250cm9sID0gdHJhaW5Db250cm9sKG1ldGhvZCA9ICJyZXBlYXRlZGN2Iiwgc2VhcmNoID0gJ3JhbmRvbScsIHJlcGVhdHMgPSA1LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdW1tYXJ5RnVuY3Rpb24gPSB0d29DbGFzc1N1bW1hcnksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNsYXNzUHJvYnMgPSBULCBzYXZlUHJlZGljdGlvbnMgPSBUKSwgIHR1bmVMZW5ndGggPSB0bCwKICAgICAgICAgICAgICAgICBtZXRyaWM9J1JPQycpCgoKcnRyZWVfbW9kZWwKCiMgUHJlZGljdCBvbiB0aGUgdHJhaW5pbmcgc2V0CnRyZWVfY2xhc3NfdGVzdCA8LSBydHJlZV9tb2RlbCU+JSBwcmVkaWN0KG5ld2RhdGEgPSB0ZXN0LnNldCwgdHlwZSA9ICdyYXcnKQp0cmVlX3Byb2JfdGVzdCA8LSBydHJlZV9tb2RlbCU+JSBwcmVkaWN0KG5ld2RhdGEgPSB0ZXN0LnNldCwgdHlwZSA9ICdwcm9iJykKCiMgQ29uZnVzaW9uIE1hdHJpeAptZXRyaWNzIDwtIGNvbmZ1c2lvbk1hdHJpeCh0cmVlX2NsYXNzX3Rlc3QsIHRlc3Quc2V0W1t0YXJnZXRfdmFyaWFibGVdXSkKbWV0cmljc2J5Y2xhc3MgPC0gbWV0cmljcyRieUNsYXNzCgpzZW5zIDwtIHR5cGUuY29udmVydChtZXRyaWNzYnljbGFzc1sxXSkKbGlzdF9zZW5zIDwtIGMobGlzdF9zZW5zLCBzZW5zKQoKc3BlYyA8LSB0eXBlLmNvbnZlcnQobWV0cmljc2J5Y2xhc3NbMl0pCmxpc3Rfc3BlYyA8LSBjKGxpc3Rfc3BlYywgc3BlYykKCmYxIDwtIHR5cGUuY29udmVydChtZXRyaWNzYnljbGFzc1s3XSkKbGlzdF9mMSA8LSBjKGxpc3RfZjEsIGYxKQoKdHVuZWxlbiA8LSB0bApsaXN0X3RsIDwtIGMobGlzdF90bCwgdHVuZWxlbikKCmthcHBhIDwtIHR5cGUuY29udmVydChtZXRyaWNzJG92ZXJhbGxbMl0pCmxpc3Rfa2FwcGEgPC0gYyhsaXN0X2thcHBhLCBrYXBwYSkKCiMgQ29uZnVzaW9uIG1hdHJpeAphcmVhIDwtIHJvYyh0ZXN0LnNldFtbdGFyZ2V0X3ZhcmlhYmxlXV0sIHRyZWVfcHJvYl90ZXN0W1twb3NpdGl2ZV9jbGFzc11dLCBwbG90ID0gVFJVRSwgcHJpbnQuYXVjID0gVFJVRSwgbGVnYWN5LmF4ZXMgPSBUUlVFKQpsaXN0X2F1YyA8LSBjKGxpc3RfYXVjLCBhcmVhJGF1YykKCiMgcnBhcnQgbW9kZWwKcnBhcnQucGxvdChydHJlZV9tb2RlbCRmaW5hbE1vZGVsKQpmaWxlbmFtZSA8LXBhc3RlKCcuLi8uLi8uLi9kYXRhL3RyZWVzL21vZGVscy9kZWNrL3RyZWUtZGVjay1uby11bmRlcnNhbXBsZScsIHRvU3RyaW5nKHRsKSwgJy5jc3YnLCBjb2xsYXBzZSA9ICcnKQp3cml0ZS5jc3YocnBhcnQucnVsZXMocnRyZWVfbW9kZWwkZmluYWxNb2RlbCwgcm91bmRpbnQ9RkFMU0UsIGNsaXAuZmFjcz1UUlVFKSwgZmlsZW5hbWUpCgojIFByb2JhYmlsaXR5IHZhbHVlcwp0cmVlX3Byb2JfdGVzdFsnVHVuZWxlbmd0aCddIDwtIHJlcCh0bCwgbGVuZ3RoKHRyZWVfY2xhc3NfdGVzdCkpCnRyZWVfcHJvYl90ZXN0WydTdHJ1Y3R1cmVOdW1iZXInXSA8LSBkZltyb3duYW1lcyh0cmVlX3Byb2JfdGVzdCksXSRzdHJ1Y3R1cmUubnVtYmVyCgojIENvbmNhdGVuYXRlCnByb2JhYmlsaXRpZXNfZHQgPC0gYmluZF9yb3dzKHByb2JhYmlsaXRpZXNfZHQsIHRyZWVfcHJvYl90ZXN0KQoKIyBtZXRyaWNzCm1ldHJpY3MKfQoKZGZfbWV0cmljX2R0IDwtIGRhdGEuZnJhbWUobGlzdF9zZW5zLCBsaXN0X3NwZWMsIGxpc3RfZjEsIGxpc3RfdGwsIGxpc3Rfa2FwcGEsIGxpc3RfYXVjKQpuYW1lcyhkZl9tZXRyaWNfZHQpIDwtIGMoJ1NlbnNpdGl2aXR5JywgJ1NwZWNpZmljaXR5JywgJ0YxJywnVHVuZWxlbmd0aCcsICdLYXBwYScsICdBVUMnKQoKIyBXcml0aW5nIG91dHB1dHMKd3JpdGUuY3N2KHByb2JhYmlsaXRpZXNfZHQsICcuLi8uLi8uLi9kYXRhL3RyZWVzL21ldHJpY3MvZHQvdHJlZS1wcm9iLWRlY2stbm91LmNzdicpCndyaXRlLmNzdihkZl9tZXRyaWNfZHQsICcuLi8uLi8uLi9kYXRhL3RyZWVzL21ldHJpY3MvZHQvdHJlZS1tZXRyaWMtZGVjay1ub3UuY3N2JykKCmRmX21ldHJpY19kdApwcm9iYWJpbGl0aWVzX2R0CmBgYAoK